
* loading in data

use "$data/Non_payment_dataset.dta" , clear
xtset customer t


** Excluding Outliers **

* Excluding customers with negative payments 
bysort customer: egen minpayment=min(payment) 
drop if minpayment<0 

* Excluding customers with 0.1% largest payments
bysort customer: egen maxpayment=max(payment) 
_pctile maxpayment if itt!=. , nq(1000)
di r(r999) // 0.1% largest payments are above 18561.6  
drop if maxpayment>18561.6  


***********************************************************************************************************************
**** Creating Variables  **********************************************************************************************
***********************************************************************************************************************

gen paidcount=0 if payment!=.
replace paidcount=1 if payment>0 & payment!=.

* Pre-treatment is before September (t<=12). Started calling at the end of September (t=13). Customers received first messages in October (t=14)
bysort customer: egen premeaninvoice=mean(invoices) if t<=12
bysort customer: egen premeanpayment=mean(payment) if t<=12
bysort customer: egen prepaidcounttotal=sum(paidcount) if t<=12
bysort customer: egen presuminvoice=sum(invoices)  if t<=12
bysort customer: egen presumpayment=sum(payment) if t<=12
bysort customer: gen presumpaymentratio=presumpayment/presuminvoice if t<=12

gen preclosingbalance=closingbalance if t==12 

foreach var in premeaninvoice premeanpayment preclosingbalance preage_of_account prepaidcounttotal presuminvoice presumpayment presumpaymentratio {
bysort customer: egen `var'mean=mean(`var')
replace `var'=`var'mean
drop `var'mean
}

* Log and asinh transformations 
gen asinhpayment=asinh(payment)
gen asinhinvoice=asinh(invoice)
gen lnpayment=ln(payment)
gen lnpayment1=ln(payment+1)
gen cubepayment=payment^(1/3)

* Winsorize (top coding data) 
winsor closingbalance, gen(closingbalancew) p(0.01)
winsor invoices, gen(invoicesw) p(0.01)
winsor payment, gen(paymentw) p(0.01)
winsor premeaninvoice, gen(premeaninvoicew) p(0.01)
winsor preage_of_account, gen(preage_of_accountw) p(0.01)
winsor preclosingbalance, gen(preclosingbalancew) p(0.01)
winsor premeanpayment, gen(premeanpaymentw) p(0.01)
gen paymentcond=payment if payment>0 & payment!=.
winsor paymentcond, gen(paymentcondw) p(0.01)
winsor presumpaymentratio, gen(presumpaymentratiow) p(0.01)

* Generating variables For Entropy Balancing and CEM-Matching 
gen averagepayment=lnpayment if lnpayment>0  & lnpayment!=.
bysort customer:egen meanaveragepayment1_2=mean(averagepayment) if t<7
bysort customer:egen meanaveragepayment1=mean(meanaveragepayment1_2)
replace meanaveragepayment1=0 if meanaveragepayment1==.

bysort customer:egen meanaveragepayment2_2=mean(averagepayment) if t>6 & t<13
bysort customer:egen meanaveragepayment2=mean(meanaveragepayment2_2)
replace meanaveragepayment2=0 if meanaveragepayment2==.

bysort customer:egen meanasinhinvoice2=mean(asinhinvoice) if t<13
bysort customer:egen meanasinhinvoice=mean(meanasinhinvoice2)

gen noconsumption=0 if invoices>0 & invoices!=.
replace noconsumption=1 if invoices==0
bysort customer:egen prenoconsumption=mean(noconsumption) if t<13

bysort customer:egen paidcount1_2=mean(paidcount) if t<7 
bysort customer:egen paidcount1=mean(paidcount1_2)
bysort customer:egen paidcount2_2=mean(paidcount) if t>6 & t<13
bysort customer:egen paidcount2=mean(paidcount2_2)

gen inactive=0 if t==12 & noconsumption==0
replace inactive=1 if t==12 & noconsumption==1
bysort customer: egen inactiveall=mean(inactive)

** Attrition codings
gen attrited=0 // First type of attrition - Customer account shows up in data but is deactivated (no payments and consumption)
replace attrited=1 if payment==. & invoices==. & t==22

gen moreattrition2=0 if t==22 // Second type of attrition - Customer account does not show up in data anymore, but customer was treated
bysort customer:egen moreattrition=mean(moreattrition2)
replace moreattrition=1 if moreattrition==.

gen attritionrate=0 
replace attritionrate=1 if moreattrition==1
replace attritionrate=1 if attrited==1

** Dummy if customer ever attrited
bysort customer: egen everattrited=max(attritionrate)
gen attrition=itt if everattrited==1


** Table 4. No Heterogeneous Treatment Effects with Respect to Sanctioning Probability

** Customers can get cut if they did not pay a bill for more than 45 days. 
xtset customer t
gen outstanding=closingbalancew-invoices-l.invoices-l2.invoices // At time t a customer does not yet know know about consumption in t, so concurrent consumption needs to be subtracted. We are measuring those who have certainly debt older than 60 days. 

gen threat2=0 if outstanding<=0 & t==12 // This is the last invoice the customer has technically available at time of the intervention
replace threat2=1 if outstanding>0 & t==12  
bysort customer: egen threat=mean(threat2)

gen debtgroup=0 if preclosingbalance<=0 // dummy for prepayers group
replace debtgroup=1 if threat==0 & preclosingbalance>0 // dummy for "below cut-off threshold" group
replace debtgroup=2 if threat==1 // dummy for "above cut-off threshold" group

gen itt2low=0 if itt!=.
replace itt2low=1 if itt2==1 & debtgroup==0
gen itt2high=0 if itt!=.
replace itt2high=1 if itt2==1 & debtgroup==1
gen itt2veryhigh=0 if itt!=.
replace itt2veryhigh=1 if itt2==1 & debtgroup==2

gen itt3low=0 if itt!=.
replace itt3low=1 if itt3==1 & debtgroup==0
gen itt3high=0 if itt!=.
replace itt3high=1 if itt3==1 & debtgroup==1
gen itt3veryhigh=0 if itt!=.
replace itt3veryhigh=1 if itt3==1 & debtgroup==2


** Table A13. ITT Effects by Probability of Being on a Sanctioning List **

bysort customer: egen meanpreclosingbalance2=mean(closingbalance) if t<13 
bysort customer: egen meanpreclosingbalance=mean(meanpreclosingbalance2)
winsor meanpreclosingbalance, gen(meanpreclosingbalancew) p(0.01)

gen predso=meanpreclosingbalancew/premeaninvoicew // debt measured in average invoices

xtile dsomedian = predso if itt!=., nq(2) // media split into low and high debt types
replace dsomedian=dsomedian-1

tab dsomedian

gen itt2lowdso=0 if itt!=.
replace itt2lowdso=1 if itt2==1 & dsomedian==0
gen itt2highdso=0 if itt!=.
replace itt2highdso=1 if itt2==1 & dsomedian==1

gen itt3lowdso=0 if itt!=.
replace itt3lowdso=1 if itt3==1 & dsomedian==0
gen itt3highdso=0 if itt!=.
replace itt3highdso=1 if itt3==1 & dsomedian==1



** Table A14. No Heterogeneous Treatment Effects with Respect to Distance to

sum distance if t==12 & itt!=.

replace distance=distance-36.91209  // mean center 
gen asinhdistance=asinh(distance)

gen itt2asinhdistancedistance=itt2*asinhdistance
gen itt3asinhdistancedistance=itt3*asinhdistance

** Table A15. Heterogeneous Treatment Effects w.r.t. to Language

gen eng=0 if lang=="O"
replace eng=1 if lang=="E"

gen itt2xeng=itt2*eng 
gen itt3xeng=itt3*eng

save "$data/Non_payment_dataset_ready.dta" , replace
